Besides checking the results of DE analysis, also save the sets of DE genes out, to prepare for next step of gene set enrichment analysis.

R libraries.

library(ggplot2)
library(ggrepel)
library(ggpubr)
library(stringr)
library(MASS)
library(RColorBrewer)
library(DESeq2)

library(viridis)
library(ggpointdensity)
library(dplyr)
library(data.table)
library(ComplexHeatmap)
library(UpSetR)
library(readxl)
theme_set(theme_classic())

path = "../../data/MorPhiC/June-2024/JAX_RNAseq_Neuroectoderm/"
dat_path = paste0(path, "processed/release110-gencode44/Tables")

Read in meta data

From searching online, my guess is the model_organ content inside the bracket should be

dorsal forebrain patterning

instead of

dorsal forebrain pattering

But leave it as it is for now.

meta = fread("data/June_2024/JAX_RNAseq_Neuroectoderm_meta_data.tsv", 
             data.table = FALSE)
dim(meta)
## [1] 174  32
meta[1:2,]
##   biomaterial_id lib_biomaterial_id differentiated_biomaterial_id
## 1    MOK20002-WT         GT23-05635          CBO-MOK20002-WT-Day7
## 2    MOK20002-WT         GT23-05625          CBO-MOK20002-WT-Day6
##    differentiated_biomaterial_description differentiation_protocol
## 1 KOLF2.2 derived cortical brain organoid                 JAXPD003
## 2 KOLF2.2 derived cortical brain organoid                 JAXPD003
##   differentiated_timepoint_value differentiated_timepoint_unit
## 1                              7                          days
## 2                              6                          days
##   differentiated_terminally_differentiated
## 1                                       No
## 2                                       No
##                                   model_organ ko_strategy ko_gene
## 1 Cortical brain (dorsal forebrain pattering)          WT      WT
## 2 Cortical brain (dorsal forebrain pattering)          WT      WT
##   library_preparation_protocol dissociation_protocol fragment_size input_amount
## 1                     JAXPL001                   N.A           410          300
## 2                     JAXPL001                   N.A           425          300
##   input_unit final_yield final_yield_unit lib_concentration
## 1        ngs       249.6              ngs              51.8
## 2        ngs       170.0              ngs              33.8
##   lib_concentration_unit PCR_cycle PCR_cycle_sample_index
## 1                     nM        10                     NA
## 2                     nM        10                     NA
##                                             file_id sequencing_protocol
## 1 RFX_WT_Day7_GT23-05635_GCACGGAC-TGCGAGAC_S55_L002            JAXPS001
## 2 RFX_WT_Day6_GT23-05625_GACCTGAA-CTCACCAA_S97_L002            JAXPS001
##                       run_id biomaterial_description
## 1 20230424_23-scbct-028-run3                 KOLF2.2
## 2 20230424_23-scbct-028-run3                 KOLF2.2
##   derived_cell_line_accession clone_id protocol_id zygosity type model_system
## 1                    KOLF2.2J       WT         N.A      N.A iPSC          CBO
## 2                    KOLF2.2J       WT         N.A      N.A iPSC          CBO
names(meta)
##  [1] "biomaterial_id"                          
##  [2] "lib_biomaterial_id"                      
##  [3] "differentiated_biomaterial_id"           
##  [4] "differentiated_biomaterial_description"  
##  [5] "differentiation_protocol"                
##  [6] "differentiated_timepoint_value"          
##  [7] "differentiated_timepoint_unit"           
##  [8] "differentiated_terminally_differentiated"
##  [9] "model_organ"                             
## [10] "ko_strategy"                             
## [11] "ko_gene"                                 
## [12] "library_preparation_protocol"            
## [13] "dissociation_protocol"                   
## [14] "fragment_size"                           
## [15] "input_amount"                            
## [16] "input_unit"                              
## [17] "final_yield"                             
## [18] "final_yield_unit"                        
## [19] "lib_concentration"                       
## [20] "lib_concentration_unit"                  
## [21] "PCR_cycle"                               
## [22] "PCR_cycle_sample_index"                  
## [23] "file_id"                                 
## [24] "sequencing_protocol"                     
## [25] "run_id"                                  
## [26] "biomaterial_description"                 
## [27] "derived_cell_line_accession"             
## [28] "clone_id"                                
## [29] "protocol_id"                             
## [30] "zygosity"                                
## [31] "type"                                    
## [32] "model_system"
table(meta$model_organ, meta$ko_gene)
##                                              
##                                               FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
##   Cortical brain (dorsal forebrain pattering)    36   36    9   27    9   36 21
table(meta$run_id, meta$ko_gene)
##                             
##                              FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
##   20230228_23-scbct-008          0    0    0    0    9    0  1
##   20230424_23-scbct-028-run3     0    0    0    0    0   27  3
##   20230508_23-scbct-039-run2    27    0    0    0    0    0  3
##   20230522_23-scbct-052          0   27    0    0    0    0  3
##   20230824_23-scbct-092          0    0    9    0    0    0  1
##   20240131_23-robson-020         0    0    0   27    0    0  3
##   20240307_24-robson-003         0    0    0    0    0    9  1
##   20240307_24-robson-004         0    9    0    0    0    0  3
##   20240307_24-robson-006         9    0    0    0    0    0  3
table(meta$ko_strategy, meta$ko_gene)
##      
##       FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
##   CE     12   12    3    9    3   12  0
##   KO     12   12    3    9    3   12  0
##   PTC    12   12    3    9    3   12  0
##   WT      0    0    0    0    0    0 21

Manually correct one mistake in the metadata

For the row with column differentiated_biomaterial_id==“CBO-MOK20004-WT-Day3”, it should have value 3 instead of the original 4 in the column differentiated_timepoint_value.

print("Before correcting the error in timepoint:")
## [1] "Before correcting the error in timepoint:"
print("Table between ko_gene and differentiated_timepoint_value:")
## [1] "Table between ko_gene and differentiated_timepoint_value:"
table(meta$ko_gene, meta$differentiated_timepoint_value)
##        
##         3 4 5 6 7 8 9 14
##   FEZF2 0 0 9 9 9 9 0  0
##   LHX5  9 9 9 9 0 0 0  0
##   LHX9  0 0 0 0 0 0 0  9
##   OTX1  0 0 0 0 9 9 9  0
##   PAX6  0 0 0 0 9 0 0  0
##   RFX4  0 0 0 9 9 9 9  0
##   WT    0 2 2 5 4 5 2  1
meta[which(meta$differentiated_biomaterial_id=="CBO-MOK20004-WT-Day3"), 
     "differentiated_timepoint_value"] = 3

print("After correcting the error in timepoint:")
## [1] "After correcting the error in timepoint:"
print("Table between ko_gene and differentiated_timepoint_value:")
## [1] "Table between ko_gene and differentiated_timepoint_value:"
table(meta$ko_gene, meta$differentiated_timepoint_value)
##        
##         3 4 5 6 7 8 9 14
##   FEZF2 0 0 9 9 9 9 0  0
##   LHX5  9 9 9 9 0 0 0  0
##   LHX9  0 0 0 0 0 0 0  9
##   OTX1  0 0 0 0 9 9 9  0
##   PAX6  0 0 0 0 9 0 0  0
##   RFX4  0 0 0 9 9 9 9  0
##   WT    1 1 2 5 4 5 2  1

Read in gene count data and filter genes

cts = fread(file.path(dat_path, "genesCounts.csv"), data.table = FALSE)
dim(cts)
## [1] 38592   175
cts[1:2, c(1:2, (ncol(cts)-1):ncol(cts))]
##              Name RFX_CE_F5_Day8_GT23-05641_GTCGGAGC-TTATAACC_S73_L002
## 1 ENSG00000268674                                                    0
## 2 ENSG00000271254                                                 1521
##   LHX5_CE_A1_Day3_GT23-07300_AAGTCCAA-TACTCATA_S163_L004
## 1                                                      0
## 2                                                   1270
##   LHX5_KO_E4_Day5_GT23-07318_AACGTTCC-AGTACTCC_S166_L004
## 1                                                      0
## 2                                                    746
stopifnot(setequal(names(cts)[-1], meta$file_id))

meta = meta[match(names(cts)[-1], meta$file_id),]
table(names(cts)[-1] == meta$file_id)
## 
## TRUE 
##  174
cts_dat = data.matrix(cts[,-1])
rownames(cts_dat) = cts$Name

Discard genes whose expression is 0 in more than 75% of samples

model_s = meta$model_system
table(model_s, useNA="ifany")
## model_s
## CBO 
## 174
table(meta$model_system, meta$model_organ, useNA="ifany")
##      
##       Cortical brain (dorsal forebrain pattering)
##   CBO                                         174
get_q75 <- function(x){quantile(x, probs = 0.75)}

# given that there is only one level for model_system
# the result from apply(cts_dat, 1, tapply, model_s, min) is no longer a matrix
# but a vector
# do not do transpose to the results from apply(cts_dat, 1, tapply, model_s, min)
gene_info = data.frame(Name = cts$Name, 
                       apply(cts_dat, 1, tapply, model_s, min), 
                       apply(cts_dat, 1, tapply, model_s, median), 
                       apply(cts_dat, 1, tapply, model_s, get_q75), 
                       apply(cts_dat, 1, tapply, model_s, max))

dim(gene_info)
## [1] 38592     5
gene_info[1:2, ]
##                            Name apply.cts_dat..1..tapply..model_s..min.
## ENSG00000268674 ENSG00000268674                                       0
## ENSG00000271254 ENSG00000271254                                     426
##                 apply.cts_dat..1..tapply..model_s..median.
## ENSG00000268674                                          0
## ENSG00000271254                                       1318
##                 apply.cts_dat..1..tapply..model_s..get_q75.
## ENSG00000268674                                        0.00
## ENSG00000271254                                     1701.75
##                 apply.cts_dat..1..tapply..model_s..max.
## ENSG00000268674                                       3
## ENSG00000271254                                    3117
table(row.names(gene_info)==gene_info$Name)
## 
##  TRUE 
## 38592
names(gene_info)[2:5] = paste0(rep(c("CBO_"), times=4), 
                               rep(c("min", "median", "q75", "max"), each=1))
dim(gene_info)
## [1] 38592     5
gene_info[1:2,]
##                            Name CBO_min CBO_median CBO_q75 CBO_max
## ENSG00000268674 ENSG00000268674       0          0    0.00       3
## ENSG00000271254 ENSG00000271254     426       1318 1701.75    3117
summary(gene_info[,-1])
##     CBO_min          CBO_median          CBO_q75            CBO_max       
##  Min.   :    0.0   Min.   :     0.0   Min.   :     0.0   Min.   :      0  
##  1st Qu.:    0.0   1st Qu.:     0.0   1st Qu.:     0.0   1st Qu.:      2  
##  Median :    0.0   Median :     3.0   Median :     5.8   Median :     22  
##  Mean   :  213.8   Mean   :   699.1   Mean   :   931.4   Mean   :   1829  
##  3rd Qu.:   89.0   3rd Qu.:   332.1   3rd Qu.:   474.6   3rd Qu.:   1003  
##  Max.   :88969.0   Max.   :489181.5   Max.   :586205.0   Max.   :1301990
table(gene_info$CBO_q75 > 0)
## 
## FALSE  TRUE 
## 14390 24202
w2kp = which(gene_info$CBO_q75 > 0)
cts_dat = cts_dat[w2kp,]
dim(cts_dat)
## [1] 24202   174
gene_info = gene_info[w2kp,]

Read in gene annotation

gene_anno = fread("data/gencode_v44_primary_assembly_info.tsv")
dim(gene_anno)
## [1] 62754     8
gene_anno[1:2,]
##                geneId  chr strand     start       end ensembl_gene_id
## 1: ENSG00000000003.16 chrX      - 100627108 100639991 ENSG00000000003
## 2:  ENSG00000000005.6 chrX      + 100584936 100599885 ENSG00000000005
##    hgnc_symbol                                       description
## 1:      TSPAN6 tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858]
## 2:        TNMD   tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757]
# all genes are contained in the annotation file 
table(gene_info$Name %in% gene_anno$ensembl_gene_id)
## 
##  TRUE 
## 24202
gene_info = merge(gene_anno, gene_info, by.x="ensembl_gene_id", by.y = "Name", 
                  all.x = FALSE, all.y = TRUE)
dim(gene_info)
## [1] 24202    12
gene_info[1:2,]
##    ensembl_gene_id             geneId  chr strand     start       end
## 1: ENSG00000000003 ENSG00000000003.16 chrX      - 100627108 100639991
## 2: ENSG00000000005  ENSG00000000005.6 chrX      + 100584936 100599885
##    hgnc_symbol                                       description CBO_min
## 1:      TSPAN6 tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858]     744
## 2:        TNMD   tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757]       0
##    CBO_median CBO_q75 CBO_max
## 1:     2644.0 3235.00    5449
## 2:        9.5   20.75      97
gene_info[which(!gene_info$ensembl_gene_id%in%gene_anno$ensembl_gene_id)]
## Empty data.table (0 rows and 12 cols): ensembl_gene_id,geneId,chr,strand,start,end...
gene_info[gene_info$CBO_min > 2e4, c("CBO_min", "hgnc_symbol")]
##    CBO_min hgnc_symbol
## 1:   22057        ACTB
## 2:   23713     HNRNPA1
## 3:   66180      EEF1A1
## 4:   23858        TUBB
## 5:   88639      MT-CO1
## 6:   29947      MT-ND4
## 7:   39231      MT-CO3
## 8:   88969      MALAT1
gene_info$gene_symbol = gene_info$hgnc_symbol

table(gene_info$gene_symbol == "")
## 
## FALSE  TRUE 
## 19181  4959
table(is.na(gene_info$gene_symbol))
## 
## FALSE  TRUE 
## 24140    62
wEns = which(gene_info$gene_symbol == "" | is.na(gene_info$gene_symbol))
gene_info$gene_symbol[wEns] = gene_info$ensembl_gene_id[wEns]

table(gene_info$gene_symbol == "")
## 
## FALSE 
## 24202
table(is.na(gene_info$gene_symbol))
## 
## FALSE 
## 24202

Compare the number of DE genes (different ko strategies)

For this dataset, there is only one model system, and for each day group and each knockout gene, there are three knockout strategies.

Do it separately by timepoint (day).

day_group_strings = c("3_4", "5", "6", "7", "8", "9")

# define a function to extract the model_daygroup_gene part of the result filenames
# under the assumption that the desired part is the one before the 
# second-to-last underscore
extract_model_day_group <- function(x) {
  parts <- unlist(str_split(x, "_"))
  return(paste(parts[1:(length(parts) - 2)], collapse = "_"))
}

for (day_group in day_group_strings){
  
  res = fread(paste0("results/2024_06_JAX_RNAseq_Neuroectoderm/",
                     "2024_06_JAX_RNAseq_Neuroectoderm_CBO_", 
                     day_group, "_DEseq2.tsv"), data.table = FALSE)
  print(res[1:2,1:7])

  items = names(res)[grep("_padj", names(res))]
  items = unique(sapply(items, extract_model_day_group))
  items

  fc0 = log2(1.5)
  p0  = 0.05

  gene_symbols = str_extract(items, "[a-zA-Z0-9]+$")
  gene_symbols

  table(gene_symbols %in% gene_info$gene_symbol)

  df = data.frame(KO = items, gene_symbol = gene_symbols, 
                  CE_nDE = NA, KO_nDE = NA, PTC_nDE = NA, 
                  CE_padj = NA, CE_log2FoldChange = NA, 
                  KO_padj = NA, KO_log2FoldChange = NA, 
                  PTC_padj = NA, PTC_log2FoldChange = NA)

  df$Model = str_extract(df$KO, "^[a-zA-Z0-9]+")

  gene_ids = gene_info$ensembl_gene_id[match(gene_symbols, gene_info$gene_symbol)]

  for(k in 1:length(items)){
    it_k = items[k]
    gene_id = gene_ids[k]
    w_gene = which(res$gene_ID == gene_id)
    stopifnot(length(w_gene) == 1)
    
    for(ko1 in c("CE", "KO", "PTC")){
      fc = paste0(it_k, "_", ko1, "_log2FoldChange")
      padj = paste0(it_k, "_", ko1, "_padj")
      col_name = paste0(ko1, "_nDE")
      df[k,col_name] = sum(abs(res[[fc]]) > fc0 & res[[padj]] < p0, na.rm = TRUE)
      
      col_name = paste0(ko1, "_log2FoldChange")
      df[k,col_name] = res[[fc]][w_gene]
      
      col_name = paste0(ko1, "_padj")
      df[k,col_name] = res[[padj]][w_gene]
      
    }
  }

  print(df)

  p1 <- ggplot(df, aes(x=(PTC_nDE), y=(CE_nDE), 
                 label=gene_symbol)) + 
        geom_point() + scale_shape_manual(values = c(7, 16)) + 
        geom_text_repel() + ggtitle("# of DE genes") + 
        labs(x="PTC", y="CE") + 
        geom_abline(intercept = 0, slope = 1, color = "orange", linetype = "dashed")

  p2 <- ggplot(df, aes(x=(PTC_nDE), y=(KO_nDE),  
                 label=gene_symbol)) + 
        geom_point() + scale_shape_manual(values = c(7, 16)) + 
        geom_text_repel() + ggtitle("# of DE genes") + 
        labs(x="PTC", y="KO") + 
        geom_abline(intercept = 0, slope = 1, color = "orange", linetype = "dashed")
  
  p3 <- ggplot(df, aes(x=CE_log2FoldChange, y=-log10(CE_padj), 
                 label=gene_symbol)) + 
        geom_point(size=2) + scale_shape_manual(values = c(7, 16)) + 
        geom_text_repel() + ggtitle("DE results of the KO genes, CE") + 
        labs(x="log2 fold change", y="-log10(p-value)")  + 
        geom_hline(yintercept = 2, color = "grey", linetype = "dashed") + 
        geom_vline(xintercept = c(log2(1/1.5), log2(1.5)), color = "grey",
                   linetype = "dashed")
  
  p4 <- ggplot(df, aes(x=KO_log2FoldChange, y=-log10(KO_padj), 
                 label=gene_symbol)) + 
        geom_point(size=2) + scale_shape_manual(values = c(7, 16)) + 
        geom_text_repel() + ggtitle("DE results of the KO genes, KO") + 
        labs(x="log2 fold change", y="-log10(p-value)")  + 
        geom_hline(yintercept = 2, color = "grey", linetype = "dashed") + 
        geom_vline(xintercept = c(log2(1/1.5), log2(1.5)), color = "grey",
                   linetype = "dashed")
  
  p5 <- ggplot(df, aes(x=PTC_log2FoldChange, y=-log10(PTC_padj), 
                 label=gene_symbol)) + 
        geom_point(size=2) + scale_shape_manual(values = c(7, 16)) + 
        geom_text_repel() + ggtitle("DE results of the KO genes, PTC") + 
        labs(x="log2 fold change", y="-log10(p-value)")  + 
        geom_hline(yintercept = 2, color = "grey", linetype = "dashed") + 
        geom_vline(xintercept = c(log2(1/1.5), log2(1.5)), color = "grey",
                   linetype = "dashed")

 g_combined <- ggarrange(p1, p2, p3, p4, p5, ncol = 2, nrow = 3)
 print(annotate_figure(g_combined,
                      top = text_grob(paste0("CBO, day group ", day_group), 
                                      face = "bold", size = 16)))
  
}
##           gene_ID CBO_3_4_LHX5_CE_baseMean CBO_3_4_LHX5_CE_log2FoldChange
## 1 ENSG00000271254                  1330.00                         0.0349
## 2 ENSG00000276345                     2.54                         0.9730
##   CBO_3_4_LHX5_CE_lfcSE CBO_3_4_LHX5_CE_stat CBO_3_4_LHX5_CE_pvalue
## 1                 0.171                0.204                  0.838
## 2                 2.370                0.410                  0.682
##   CBO_3_4_LHX5_CE_padj
## 1                    1
## 2                    1
##             KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_3_4_LHX5        LHX5      1      4       5       1             0.366
##   KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1       1             -1.16        1              -2.08   CBO
##           gene_ID CBO_5_LHX5_CE_baseMean CBO_5_LHX5_CE_log2FoldChange
## 1 ENSG00000271254                1180.00                        0.216
## 2 ENSG00000276345                   1.45                        2.600
##   CBO_5_LHX5_CE_lfcSE CBO_5_LHX5_CE_stat CBO_5_LHX5_CE_pvalue
## 1               0.123              1.760               0.0788
## 2               3.410              0.761               0.4470
##   CBO_5_LHX5_CE_padj
## 1              0.698
## 2                 NA
##            KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1  CBO_5_LHX5        LHX5      2    172       1    0.96             0.101
## 2 CBO_5_FEZF2       FEZF2    182      9      22      NA            -4.640
##   KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1   0.978           -0.0183   0.0949              -1.06   CBO
## 2      NA           -5.6900       NA              -1.05   CBO
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).

##           gene_ID CBO_6_LHX5_CE_baseMean CBO_6_LHX5_CE_log2FoldChange
## 1 ENSG00000271254                 866.00                        0.244
## 2 ENSG00000276345                   1.19                       12.300
##   CBO_6_LHX5_CE_lfcSE CBO_6_LHX5_CE_stat CBO_6_LHX5_CE_pvalue
## 1               0.144               1.69              0.09080
## 2               3.210               3.83              0.00013
##   CBO_6_LHX5_CE_padj
## 1             0.8980
## 2             0.0286
##            KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1  CBO_6_LHX5        LHX5    100     22      32   0.545            -1.290
## 2 CBO_6_FEZF2       FEZF2     37     32      41      NA            -2.130
## 3  CBO_6_RFX4        RFX4     45     48      41   1.000            -0.636
##   KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 1.00000            -0.427        1            -0.3620   CBO
## 2 0.00326            -6.270        1            -0.0372   CBO
## 3 0.95800             0.558        1            -0.8610   CBO
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_text_repel()`).

##           gene_ID CBO_7_FEZF2_CE_baseMean CBO_7_FEZF2_CE_log2FoldChange
## 1 ENSG00000271254                    1580                       -0.1590
## 2 ENSG00000277196                    1300                       -0.0632
##   CBO_7_FEZF2_CE_lfcSE CBO_7_FEZF2_CE_stat CBO_7_FEZF2_CE_pvalue
## 1                0.179              -0.892                 0.372
## 2                0.281              -0.225                 0.822
##   CBO_7_FEZF2_CE_padj
## 1               0.686
## 2               0.935
##            KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_7_FEZF2       FEZF2      1      5       2      NA            -0.296
## 2  CBO_7_PAX6        PAX6      1      1       1       1            -0.567
## 3  CBO_7_RFX4        RFX4      3      0       0       1            -1.290
## 4  CBO_7_OTX1        OTX1      4      1       5       1             0.606
##    KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 2.63e-06            -4.010        1             1.6100   CBO
## 2 1.00e+00             1.440        1            -1.2700   CBO
## 3 1.00e+00            -0.839        1            -0.9720   CBO
## 4 1.00e+00             0.468        1            -0.0452   CBO
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_text_repel()`).

##           gene_ID CBO_8_RFX4_CE_baseMean CBO_8_RFX4_CE_log2FoldChange
## 1 ENSG00000271254                   1320                        0.143
## 2 ENSG00000277196                    370                       -0.494
##   CBO_8_RFX4_CE_lfcSE CBO_8_RFX4_CE_stat CBO_8_RFX4_CE_pvalue
## 1               0.158              0.908               0.3640
## 2               0.266             -1.850               0.0636
##   CBO_8_RFX4_CE_padj
## 1                  1
## 2                  1
##            KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1  CBO_8_RFX4        RFX4     74     70      72   1.000            -1.040
## 2  CBO_8_OTX1        OTX1     37     84      47   0.267             1.300
## 3 CBO_8_FEZF2       FEZF2     37     71      64   1.000            -0.341
##    KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 1.00e+00            -0.419    1.000             -1.360   CBO
## 2 6.69e-01             0.719    1.000              0.328   CBO
## 3 1.12e-25            -3.710    0.102              0.808   CBO

##           gene_ID CBO_9_RFX4_CE_baseMean CBO_9_RFX4_CE_log2FoldChange
## 1 ENSG00000271254                   1470                        0.291
## 2 ENSG00000277196                    163                       -0.031
##   CBO_9_RFX4_CE_lfcSE CBO_9_RFX4_CE_stat CBO_9_RFX4_CE_pvalue
## 1               0.163              1.780               0.0745
## 2               0.232             -0.134               0.8940
##   CBO_9_RFX4_CE_padj
## 1                  1
## 2                  1
##           KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_9_RFX4        RFX4     59     79      54       1            -1.150
## 2 CBO_9_OTX1        OTX1     52     70      61       1             0.614
##   KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1   0.273            -2.660        1              -1.22   CBO
## 2   1.000             0.447        1               0.38   CBO

Generate valcano plots

In some (day group, knockout gene, knockout strategy) combinations, there are too many DE genes and they cannot all be labeled in the valcano plots.

for (day_group in day_group_strings){
  
  # create a list to store DE gene set for the gene set enrichment analysis
  de_gene_sets = list() 
  
  print(day_group)
  
  res = fread(paste0("results/2024_06_JAX_RNAseq_Neuroectoderm/",
                     "2024_06_JAX_RNAseq_Neuroectoderm_CBO_", 
                     day_group, "_DEseq2.tsv"), data.table = FALSE)
  print(res[1:2,1:7])

  items = names(res)[grep("_padj", names(res))]
  items

  for(it1 in items){
    id1 = gsub("_padj", "", it1)
    
    res_k = res[,c(1, grep(id1, names(res)))]
    names(res_k) = gsub(paste0(id1, "_"), "", names(res_k))
    
    ww_up = which((res_k$log2FoldChange > log2(1.5)) & (res_k$padj < 0.05))
    ww_down = which((res_k$log2FoldChange < -log2(1.5)) & (res_k$padj < 0.05))

    de_gene_sets[[paste0(id1, "_up")]] = res_k[ww_up,]$gene_ID
    de_gene_sets[[paste0(id1, "_down")]] = res_k[ww_down,]$gene_ID
  
    res_k$DE = "No"
    res_k$DE[ww_up] <- "Up"
    res_k$DE[ww_down] <- "Down"
    
    res_k$log2FoldChange[which(res_k$log2FoldChange > 3)] = 3
    res_k$log2FoldChange[which(res_k$log2FoldChange < -3)] = -3
  
    col2use = c("blue", "grey", "red")
    names(col2use) = c("Down", "No", "Up")
    
    res_k$delabel = NA
    res_k$gene_symbol = gene_info$gene_symbol[match(res_k$gene_ID, 
                                                    gene_info$ensembl_gene_id)]
    w_de = which(res_k$DE != "No")
    res_k$delabel[w_de] = res_k$gene_symbol[w_de]
  
    print(table(res_k$DE))
    
    print("--------------------------------------------------")
    print(paste0("CBO, day group ", day_group))
    print("--------------------------------------------------")
    
    g2 = ggplot(res_k, aes(x=log2FoldChange, y=-log10(padj), 
                    col=DE, label=delabel)) + 
         geom_point() + ggtitle(id1) + 
         geom_text_repel(point.padding = 0.5,
                         min.segment.length = 0, 
                         size = 3,  # Adjust text size
                         box.padding = 0.5,
                         max.overlaps = 20, 
                         show.legend = FALSE) + 
         scale_color_manual(values=col2use)
        
    print(g2)
  }

  # save out the DE gene lists for gene set enrichment analysis
  output_dir = "results/2024_06_JAX_RNAseq_Neuroectoderm_DE_gene_lists"
    
  if (!file.exists(output_dir)){
    dir.create(output_dir)
  }
  
  saveRDS(de_gene_sets, 
          sprintf("%s/2024_06_JAX_RNAseq_Neuroectoderm_CBO_%s_DE_gene_list.rds", 
                  output_dir, day_group))

}
## [1] "3_4"
##           gene_ID CBO_3_4_LHX5_CE_baseMean CBO_3_4_LHX5_CE_log2FoldChange
## 1 ENSG00000271254                  1330.00                         0.0349
## 2 ENSG00000276345                     2.54                         0.9730
##   CBO_3_4_LHX5_CE_lfcSE CBO_3_4_LHX5_CE_stat CBO_3_4_LHX5_CE_pvalue
## 1                 0.171                0.204                  0.838
## 2                 2.370                0.410                  0.682
##   CBO_3_4_LHX5_CE_padj
## 1                    1
## 2                    1
## 
##    No    Up 
## 23241     1 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 3_4"
## [1] "--------------------------------------------------"
## Warning: Removed 36 rows containing missing values (`geom_point()`).
## Warning: Removed 23241 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No 
##     4 23238 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 3_4"
## [1] "--------------------------------------------------"
## Warning: Removed 36 rows containing missing values (`geom_point()`).
## Warning: Removed 23238 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No 
##     5 23237 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 3_4"
## [1] "--------------------------------------------------"
## Warning: Removed 36 rows containing missing values (`geom_point()`).
## Warning: Removed 23237 rows containing missing values (`geom_text_repel()`).

## [1] "5"
##           gene_ID CBO_5_LHX5_CE_baseMean CBO_5_LHX5_CE_log2FoldChange
## 1 ENSG00000271254                1180.00                        0.216
## 2 ENSG00000276345                   1.45                        2.600
##   CBO_5_LHX5_CE_lfcSE CBO_5_LHX5_CE_stat CBO_5_LHX5_CE_pvalue
## 1               0.123              1.760               0.0788
## 2               3.410              0.761               0.4470
##   CBO_5_LHX5_CE_padj
## 1              0.698
## 2                 NA
## 
##  Down    No 
##     2 23565 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 5"
## [1] "--------------------------------------------------"
## Warning: Removed 5034 rows containing missing values (`geom_point()`).
## Warning: Removed 23565 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##    13 23395   159 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 5"
## [1] "--------------------------------------------------"
## Warning: Removed 12793 rows containing missing values (`geom_point()`).
## Warning: Removed 23395 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 157 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No 
##     1 23566 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 5"
## [1] "--------------------------------------------------"
## Warning: Removed 8 rows containing missing values (`geom_point()`).
## Warning: Removed 23566 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##    54 23385   128 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 5"
## [1] "--------------------------------------------------"
## Warning: Removed 10510 rows containing missing values (`geom_point()`).
## Warning: Removed 23385 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 161 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##     2 23558     7 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 5"
## [1] "--------------------------------------------------"
## Warning: Removed 14164 rows containing missing values (`geom_point()`).
## Warning: Removed 23558 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##     4 23545    18 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 5"
## [1] "--------------------------------------------------"
## Warning: Removed 12338 rows containing missing values (`geom_point()`).
## Warning: Removed 23545 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "6"
##           gene_ID CBO_6_LHX5_CE_baseMean CBO_6_LHX5_CE_log2FoldChange
## 1 ENSG00000271254                 866.00                        0.244
## 2 ENSG00000276345                   1.19                       12.300
##   CBO_6_LHX5_CE_lfcSE CBO_6_LHX5_CE_stat CBO_6_LHX5_CE_pvalue
## 1               0.144               1.69              0.09080
## 2               3.210               3.83              0.00013
##   CBO_6_LHX5_CE_padj
## 1             0.8980
## 2             0.0286
## 
##  Down    No    Up 
##    27 23811    73 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 6"
## [1] "--------------------------------------------------"
## Warning: Removed 23811 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 67 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##     9 23889    13 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 6"
## [1] "--------------------------------------------------"
## Warning: Removed 23889 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##    14 23879    18 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 6"
## [1] "--------------------------------------------------"
## Warning: Removed 23879 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##    15 23874    22 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 6"
## [1] "--------------------------------------------------"
## Warning: Removed 9735 rows containing missing values (`geom_point()`).
## Warning: Removed 23874 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    12 23879    20 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 6"
## [1] "--------------------------------------------------"
## Warning: Removed 23879 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##    16 23870    25 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 6"
## [1] "--------------------------------------------------"
## Warning: Removed 23870 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    18 23866    27 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 6"
## [1] "--------------------------------------------------"
## Warning: Removed 23866 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##     9 23863    39 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 6"
## [1] "--------------------------------------------------"
## Warning: Removed 23863 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##     8 23870    33 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 6"
## [1] "--------------------------------------------------"
## Warning: Removed 23870 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 20 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "7"
##           gene_ID CBO_7_FEZF2_CE_baseMean CBO_7_FEZF2_CE_log2FoldChange
## 1 ENSG00000271254                    1580                       -0.1590
## 2 ENSG00000277196                    1300                       -0.0632
##   CBO_7_FEZF2_CE_lfcSE CBO_7_FEZF2_CE_stat CBO_7_FEZF2_CE_pvalue
## 1                0.179              -0.892                 0.372
## 2                0.281              -0.225                 0.822
##   CBO_7_FEZF2_CE_padj
## 1               0.686
## 2               0.935
## 
##  Down    No 
##     1 23664 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 14223 rows containing missing values (`geom_point()`).
## Warning: Removed 23664 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##     3 23660     2 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 23660 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##     1 23663     1 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 23663 rows containing missing values (`geom_text_repel()`).

## 
##    No    Up 
## 23664     1 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 23664 rows containing missing values (`geom_text_repel()`).

## 
##    No    Up 
## 23664     1 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Removed 23664 rows containing missing values (`geom_text_repel()`).

## 
##    No    Up 
## 23664     1 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Removed 23664 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##     1 23662     2 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 23662 rows containing missing values (`geom_text_repel()`).

## 
##    No 
## 23665 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 23665 rows containing missing values (`geom_text_repel()`).

## 
##    No 
## 23665 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Removed 23665 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##     1 23661     3 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 23661 rows containing missing values (`geom_text_repel()`).

## 
##    No    Up 
## 23664     1 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 23664 rows containing missing values (`geom_text_repel()`).

## 
##  Down    No    Up 
##     1 23660     4 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 7"
## [1] "--------------------------------------------------"
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 23660 rows containing missing values (`geom_text_repel()`).

## [1] "8"
##           gene_ID CBO_8_RFX4_CE_baseMean CBO_8_RFX4_CE_log2FoldChange
## 1 ENSG00000271254                   1320                        0.143
## 2 ENSG00000277196                    370                       -0.494
##   CBO_8_RFX4_CE_lfcSE CBO_8_RFX4_CE_stat CBO_8_RFX4_CE_pvalue
## 1               0.158              0.908               0.3640
## 2               0.266             -1.850               0.0636
##   CBO_8_RFX4_CE_padj
## 1                  1
## 2                  1
## 
##  Down    No    Up 
##    23 23194    51 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 8"
## [1] "--------------------------------------------------"
## Warning: Removed 23194 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 25 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    19 23198    51 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 8"
## [1] "--------------------------------------------------"
## Warning: Removed 23198 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 28 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    26 23196    46 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 8"
## [1] "--------------------------------------------------"
## Warning: Removed 23196 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 39 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    14 23231    23 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 8"
## [1] "--------------------------------------------------"
## Warning: Removed 23231 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    35 23184    49 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 8"
## [1] "--------------------------------------------------"
## Warning: Removed 23184 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 48 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    23 23221    24 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 8"
## [1] "--------------------------------------------------"
## Warning: Removed 23221 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    10 23231    27 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 8"
## [1] "--------------------------------------------------"
## Warning: Removed 23231 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    48 23197    23 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 8"
## [1] "--------------------------------------------------"
## Warning: Removed 23197 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 56 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    40 23204    24 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 8"
## [1] "--------------------------------------------------"
## Warning: Removed 23204 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 38 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "9"
##           gene_ID CBO_9_RFX4_CE_baseMean CBO_9_RFX4_CE_log2FoldChange
## 1 ENSG00000271254                   1470                        0.291
## 2 ENSG00000277196                    163                       -0.031
##   CBO_9_RFX4_CE_lfcSE CBO_9_RFX4_CE_stat CBO_9_RFX4_CE_pvalue
## 1               0.163              1.780               0.0745
## 2               0.232             -0.134               0.8940
##   CBO_9_RFX4_CE_padj
## 1                  1
## 2                  1
## 
##  Down    No    Up 
##    11 22864    48 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 9"
## [1] "--------------------------------------------------"
## Warning: Removed 22864 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 32 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    23 22844    56 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 9"
## [1] "--------------------------------------------------"
## Warning: Removed 22844 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 33 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    17 22869    37 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 9"
## [1] "--------------------------------------------------"
## Warning: Removed 22869 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    13 22871    39 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 9"
## [1] "--------------------------------------------------"
## Warning: Removed 22871 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 28 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    25 22853    45 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 9"
## [1] "--------------------------------------------------"
## Warning: Removed 22853 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 25 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 
##  Down    No    Up 
##    21 22862    40 
## [1] "--------------------------------------------------"
## [1] "CBO, day group 9"
## [1] "--------------------------------------------------"
## Warning: Removed 22862 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 31 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  7652511 408.7   12436565 664.2         NA 12436565 664.2
## Vcells 22994134 175.5   56893558 434.1      65536 56893558 434.1
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] readxl_1.4.2                UpSetR_1.4.0               
##  [3] ComplexHeatmap_2.14.0       data.table_1.14.8          
##  [5] dplyr_1.1.2                 ggpointdensity_0.1.0       
##  [7] viridis_0.6.2               viridisLite_0.4.1          
##  [9] DESeq2_1.38.3               SummarizedExperiment_1.28.0
## [11] Biobase_2.58.0              MatrixGenerics_1.10.0      
## [13] matrixStats_1.0.0           GenomicRanges_1.50.2       
## [15] GenomeInfoDb_1.34.9         IRanges_2.32.0             
## [17] S4Vectors_0.36.2            BiocGenerics_0.44.0        
## [19] RColorBrewer_1.1-3          MASS_7.3-58.2              
## [21] stringr_1.5.0               ggpubr_0.6.0               
## [23] ggrepel_0.9.3               ggplot2_3.4.2              
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7           bit64_4.0.5            doParallel_1.0.17     
##  [4] httr_1.4.6             tools_4.2.3            backports_1.4.1       
##  [7] bslib_0.4.2            utf8_1.2.3             R6_2.5.1              
## [10] DBI_1.1.3              colorspace_2.1-0       GetoptLong_1.0.5      
## [13] withr_2.5.0            tidyselect_1.2.0       gridExtra_2.3         
## [16] bit_4.0.5              compiler_4.2.3         cli_3.6.1             
## [19] DelayedArray_0.24.0    labeling_0.4.2         sass_0.4.5            
## [22] scales_1.2.1           digest_0.6.31          rmarkdown_2.21        
## [25] XVector_0.38.0         pkgconfig_2.0.3        htmltools_0.5.5       
## [28] fastmap_1.1.1          GlobalOptions_0.1.2    rlang_1.1.0           
## [31] rstudioapi_0.14        RSQLite_2.3.1          farver_2.1.1          
## [34] shape_1.4.6            jquerylib_0.1.4        generics_0.1.3        
## [37] jsonlite_1.8.4         BiocParallel_1.32.6    car_3.1-2             
## [40] RCurl_1.98-1.12        magrittr_2.0.3         GenomeInfoDbData_1.2.9
## [43] Matrix_1.6-4           Rcpp_1.0.10            munsell_0.5.0         
## [46] fansi_1.0.4            abind_1.4-5            lifecycle_1.0.3       
## [49] stringi_1.7.12         yaml_2.3.7             carData_3.0-5         
## [52] zlibbioc_1.44.0        plyr_1.8.8             blob_1.2.4            
## [55] parallel_4.2.3         crayon_1.5.2           lattice_0.20-45       
## [58] cowplot_1.1.1          Biostrings_2.66.0      annotate_1.76.0       
## [61] circlize_0.4.15        KEGGREST_1.38.0        locfit_1.5-9.8        
## [64] knitr_1.44             pillar_1.9.0           rjson_0.2.21          
## [67] ggsignif_0.6.4         geneplotter_1.76.0     codetools_0.2-19      
## [70] XML_3.99-0.14          glue_1.6.2             evaluate_0.20         
## [73] foreach_1.5.2          vctrs_0.6.2            png_0.1-8             
## [76] cellranger_1.1.0       gtable_0.3.3           purrr_1.0.1           
## [79] tidyr_1.3.0            clue_0.3-65            cachem_1.0.7          
## [82] xfun_0.39              xtable_1.8-4           broom_1.0.4           
## [85] rstatix_0.7.2          tibble_3.2.1           iterators_1.0.14      
## [88] AnnotationDbi_1.60.2   memoise_2.0.1          cluster_2.1.4